setwd("~/Dropbox")

library(readr)
library(quanteda)
library(tidyverse)
library(rvest)    
library(stringr)   
library(topicmodels)
library(knitr)
library(pdftools)
library(stm) 
library(xtable)
library(readtext)
library(tidytext)
library(spacyr)
library(extrafont)
library(RColorBrewer)
library(tinytex)
nuke_cov <- read_csv("China nuclear opinion/Nukescovariates.csv") 
pcre_config()
summary(nuke_cov)

#bar charts of responses
par(family = "Arial Unicode MS") 
ggplot(data=nuke_cov) + geom_bar(mapping = aes(x=wmd_1), width = .8) + labs(x = "China\'s nuclear weapons are a liability", y = "Number of Responses") + theme_bw()
ggplot(data=nuke_cov) + geom_bar(mapping = aes(x=wmd_2), width = .8) + labs(x = "Nuclear weapons benefit Chinese security", y = "Number of Responses") + theme_bw()
ggplot(data=nuke_cov) + geom_bar(mapping = aes(x=wmd_3), width = .8) + labs(x = "Countries should never use nuclear weapons in warfare", y = "Number of Responses") + theme_bw()
ggplot(data=nuke_cov) + geom_bar(mapping = aes(x=nukes_reordered), width = .8) + labs(x = "How important is it for China to have nuclear weapons", y = "Number of Responses") + theme_bw()

#summary stats
tabulate(nuke_cov$nukes_reordered)
tabulate(nuke_cov$wmd_1)
tabulate(nuke_cov$wmd_2)
tabulate(nuke_cov$wmd_3)

#age subgroup analysis
nuke_cov$age_cat = factor(nuke_cov$age_cat, levels=c("Young", "Middle", "Older"))
summary(nuke_cov$age_cat)
ggplot(data=nuke_cov) + geom_bar(mapping = aes(x=wmd_1, fill=age_cat), width=.8, position = position_stack(reverse = TRUE)) + labs(x = "China\'s nuclear weapons are a liability", y = "Number of Responses", fill="Age Group") + scale_fill_brewer(palette = "Greys") + theme_bw() 
ggplot(data=nuke_cov) + geom_bar(mapping = aes(x=wmd_2, fill=age_cat), width = .8, position = position_stack(reverse = TRUE)) + labs(x = "Nuclear weapons benefit Chinese security", y = "Number of Responses", fill="Age Group") + scale_fill_brewer(palette = "Greys") + theme_bw()
ggplot(data=nuke_cov) + geom_bar(mapping = aes(x=wmd_3, fill=age_cat), width = .8, position = position_stack(reverse = TRUE)) + labs(x = "Countries should never use nuclear weapons in warfare", y = "Number of Responses", fill = "Age Group") + scale_fill_brewer(palette = "Greys") + theme_bw()
ggplot(data=nuke_cov) + geom_bar(mapping = aes(x=nukes_reordered, fill=age_cat), width = .8, position = position_stack(reverse = TRUE)) + labs(x = "How important is it for China to have nuclear weapons", y = "Number of Responses", fill="Age Group") + scale_fill_brewer(palette = "Greys") + theme_bw()

#text analysis
nukes <- read_csv("China nuclear opinion/Nukestext.csv") 
pcre_config()
summary(nukes)

nukemerge <- merge(nukes, nuke_cov, by = "doc_id")
summary(nukemerge)

ch_stop1 <- c("一个", "一种", stopwords("zh", source = "misc"))

nukescorpus <- corpus(nukemerge, docvars = nuke_cov)
summary(nukescorpus)

dfmnukes <- dfm(nukescorpus, remove = ch_stop1, remove_punct = TRUE, remove_numbers = TRUE)
summary(dfmnukes)
topfeatures(dfmnukes)  

nukemerge$age_cat <- factor(nukemerge$age_cat)
par(family = "Arial Unicode MS") #set font 
par(mar=c(4.1, 11.5, 5.1, 2.1)) #set margins 

#topic model
nukes8age <- stm(documents = dfmnukes, K = 8, data = docvars(nukescorpus), prevalence = ~nukemerge$age_cat)
summary(nukes8age)
enukes8age <- estimateEffect(1:8 ~ age_cat, stmobj = nukes8age, metadata = nukemerge)
plot(enukes8age, covariate="age_cat", labeltype = "custom", custom.labels = c("Deterring Great Powers: Middle", "Rights and Voice: Middle", "Territorial Interference: Middle", "Strength: Middle", "Country and Nation: Middle", "Self Defense: Middle", 
                                                                                     "Ensuring National Security: Middle", "Technology and Prestige: Middle", " Deterring Great Powers: Young", "Rights and Voice: Young", "Territorial Interference: Young", "Strength: Young", "Country and Nation: Young", "Self Defense: Young", 
                                                                                     "Ensuring National Security: Young", "Technology and Prestige : Young", " Deterring Great Powers: Older", "Rights and Voice: Older", "Territorial Interference: Older", "Strength: Older", "Country and Nation: Older", "Self Defense: Older", "Ensuring National Security: Older", "Technology and Prestige: Older"), width=55)
plot(enukes8age, covariate="age_cat", topics = c(2, 6), xlim = c(.012, .3), labeltype = "custom", custom.labels = c("Rights and Voice: Middle", "Self Defense: Middle", "Rights and Voice: Young", "Self Defense: Young", "Rights and Voice: Older", "Self Defense: Older"), width=35)
plot(enukes8age, covariate="age_cat", topics = c(4, 7, 8), xlim = c(.012, .3), labeltype = "custom", custom.labels = c("Ensuring National Security: Middle", "Strength: Middle", "Technology and Prestige: Middle", "Ensuring National Security: Young", "Strength: Young", "Technology and Prestige: Young", "Ensuring National Security: Older", "Strength: Older", "Technology and Prestige: Older"), width=35)
plot(enukes8age, covariate="age_cat", topics = c(1, 3, 5), xlim = c(0.012, .3), labeltype = "custom", custom.labels = c("Deterring Great Powers: Middle", "Territorial Interference: Middle", "Country and Nation: Middle", "Deterring Great Powers: Young", "Territorial Interference: Young", "Country and Nation: Young", "Deterring Great Powers: Older", "Territorial Interference: Older", "Country and Nation: Older"), width=35)

#Alternate specifications
nukes6age <- stm(documents = dfmnukes, K = 6, data = docvars(nukescorpus), prevalence = ~nukemerge$age_cat)
summary(nukes6age)

nukes7age <- stm(documents = dfmnukes, K = 7, data = docvars(nukescorpus), prevalence = ~nukemerge$age_cat)
summary(nukes7age)

nukes9age <- stm(documents = dfmnukes, K = 9, data = docvars(nukescorpus), prevalence = ~nukemerge$age_cat)
summary(nukes8age)

nukes10age <- stm(documents = dfmnukes, K = 10, data = docvars(nukescorpus), prevalence = ~nukemerge$age_cat)
summary(nukes10age)
